This notebooks will help me talk through a bit of material regarding spatial data, with some introduction to spatial joins.

Key topics for today:

Packages

Standards:

library(knitr)
library(tidyverse)
library(janitor)
library(lubridate) # because we will probably see some dates
library(here) # a package I haven't taught you about before that doesn't do much, but ....

Some additional packages focused on today’s work:

library(sf) # working with simple features - geospatial
library(tmap)
library(tidycensus)

Informational resources

Using the Neighborhood Geospatial Data (using /data)

Our first data source comes from opendata.dc

https://opendata.dc.gov/datasets/DCGIS::dc-health-planning-neighborhoods/about

I will use the GeoJSON file. (Newer, not necessarily better, but … a single file. Not smaller, but … this one is not big.)

Data is easily readable

neigh=st_read(here("DC_Health_Planning_Neighborhoods_joey.geojson")) %>% clean_names()
Reading layer `DC_Health_Planning_Neighborhoods' from data source 
  `U:\ds241\ds241_f22\analysis\work\DC_Health_Planning_Neighborhoods_joey.geojson' using driver `GeoJSON'
Simple feature collection with 51 features and 8 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -77.11976 ymin: 38.79165 xmax: -76.9094 ymax: 38.99556
Geodetic CRS:  WGS 84
class(neigh)
[1] "sf"         "data.frame"
plot(neigh)

Reminder - Joins

df1=tibble(fruit=c("apple","banana","cherry"),cost=c(1.5,1.2,2.25))
df2=tibble(fruit=c("apple","apple","cherry","lemon"),
           desert=c("pie","cobbler","cobbler","cheesecake"),
           cal=c(400,430,500,550))
df1
df2
left_join(df1,df2,by="fruit")

Investigating joining spatial and non-spatial data

Covid case information is available from opendatadc:

https://opendata.dc.gov/datasets/DCGIS::dc-covid-19-total-positive-cases-by-neighborhood/about

Read cases information:

df_c=read_csv(here("DC_COVID-19_Total_Positive_Cases_by_Neighborhood_joey.csv")) %>% clean_names() 
Rows: 26372 Columns: 4
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): DATE_REPORTED, NEIGHBORHOOD
dbl (2): OBJECTID, TOTAL_POSITIVES

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_cases=df_c %>%
  filter(as_date(date_reported) == "2022-02-22") %>% 
  separate(neighborhood,into=c("code","name"),sep = ":") %>%
  mutate(code=case_when(
    code=="N35" ~"N0",
    TRUE ~ code
  )) %>%
  select(-objectid,-date_reported)

Regular joining (of dataframes)

neigh2=left_join(neigh,df_cases,by=c("code")) 

tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(neigh2) +tm_polygons("total_positives",alpha=.5)

Joining with other spatial data

Let’s get some data using tidycensus. Need an API key https://api.census.gov/data/key_signup.html



census_api_key("d44395e2fa101f82260fae6b845676d71f017b70")
To install your API key for use in future sessions, run this function with `install = TRUE`.
#what variables
v20 = load_variables(2018,"acs5")
# median_family_income="    B06011_001" 
# all "B00001_001"  
#black "B02009_001"

Get some data:

df_cencus=get_acs(geography = "tract",
                  variables=c("median_inc"="B06011_001",
                              "pop"="B01001_001",
                              "pop_black"="B02009_001"),
                  state="DC",geometry=TRUE,year=2018) 
Getting data from the 2014-2018 5-year ACS
Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
Using FIPS code '11' for state 'DC'

  |                                                                                                                       
  |                                                                                                                 |   0%
  |                                                                                                                       
  |==============================================                                                                   |  41%
  |                                                                                                                       
  |=======================================================================                                          |  63%
  |                                                                                                                       
  |=================================================================================================================| 100%
class(df_cencus)
[1] "sf"         "data.frame"
plot(df_cencus)

It’s in long format. Let’s make it wide.

df_cens=df_cencus %>% select(-moe) %>% spread(variable,estimate) 

tm_shape(df_cens) +tm_polygons("median_inc",alpha=.5)

  tm_shape(neigh2) +tm_borders(col="blue",lwd=5,alpha=.2)+
  tm_shape(df_cens) +tm_borders(col="red",lwd=1,alpha=.3)
#<<<<<<< HEAD
#df_j=st_join(df_cens,neigh2)
#=======
#df_j=st_join(df_cens,neigh2,prepared=FALSE)
#>>>>>>> aaf01be5cf721819dd2df615aef7a1999bcec0c2
df_cens_adj=df_cens %>% st_transform(4326)
df_j=st_join(df_cens_adj,neigh2,largest=TRUE)
Warning: attribute variables are assumed to be spatially constant throughout all geometries

Other order?:

#<<<<<<< HEAD
##df_j_rev = st_join(neigh2,df_cens_adj,largest=TRUE)
#=======
#df_j_rev = st_join(neigh2,df_cens_adj,largest=TRUE)
#>>>>>>> aaf01be5cf721819dd2df615aef7a1999bcec0c2

Since we want the geometry for the NEIGHBORHOODS, we need a to work a little harder:

df1=df_j %>% select(median_inc,pop,pop_black,objectid) %>%
  group_by(objectid) %>%
  summarise(pop_n=sum(pop),
            pop_black_n=sum(pop_black), 
            adj_median_income=sum(pop*median_inc)/pop_n) 

plot(df1)

#df2=left_join(neigh2,df1)

df2=left_join(neigh2,df1 %>% st_set_geometry(NULL))
Joining, by = "objectid"
df2=df2 %>% mutate(black_perc=pop_black_n/pop_n, covid_rate=total_positives/pop_n)
tm_shape(df2)+tm_polygons(c("adj_median_income","covid_rate","black_perc"))
df2 %>% filter(objectid!=30) %>% tm_shape()+tm_polygons(c("adj_median_income","covid_rate","black_perc"),alpha=.4)
LS0tDQp0aXRsZTogIlJlcGxhY2VtZW50IENsYXNzIC0gQ2xlYW5pbmcgdXAgYW5kIFNwYXRpYWwgSm9pbnMiDQpkYXRlOiAgIjIwMjItMTEtMDkiDQphdXRob3I6ICJDb2FjaCBTa3VmY2EiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIG5vdGVib29rcyB3aWxsIGhlbHAgbWUgdGFsayB0aHJvdWdoIGEgYml0IG9mIG1hdGVyaWFsIHJlZ2FyZGluZyAqc3BhdGlhbCBkYXRhKiwgDQp3aXRoIHNvbWUgaW50cm9kdWN0aW9uIHRvIGBzcGF0aWFsIGpvaW5zYC4NCg0KS2V5IHRvcGljcyBmb3IgdG9kYXk6DQoNCiogVXNpbmcgYHNmYCBwYWNrYWdlDQoqIFVzaW5nIGB0bWFwYCBwYWNrYWdlDQoqIFVzaW5nIGB0aWR5Y2Vuc3VzYCBwYWNrYWdlDQoqIFJlbWluZGVyIG9uIGpvaW5zIChmb2N1czogbGVmdCBqb2luKQ0KKiBPdXIgU3BhdGlhbCBEYXRhDQogICAqIE5laWdoYm9yaG9vZHMNCiAgICogSm9pbmluZyB3aXRoIG5vbi1zcGF0aWFsIGRhdGENCiAgICogQ2Vuc3VzIGRhdGENCiAgICogSm9pbmluZyB3aXRoIHNwYXRpYWwgZGF0YQ0KKiBpZ25vcmUgaHRtbCBpbiBnaXQgIChpZiB0aW1lKQ0KDQojIyBQYWNrYWdlcw0KDQpTdGFuZGFyZHM6DQoNCmBgYHtyfQ0KbGlicmFyeShrbml0cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShsdWJyaWRhdGUpICMgYmVjYXVzZSB3ZSB3aWxsIHByb2JhYmx5IHNlZSBzb21lIGRhdGVzDQpsaWJyYXJ5KGhlcmUpICMgYSBwYWNrYWdlIEkgaGF2ZW4ndCB0YXVnaHQgeW91IGFib3V0IGJlZm9yZSB0aGF0IGRvZXNuJ3QgZG8gbXVjaCwgYnV0IC4uLi4NCmBgYA0KDQpTb21lIGFkZGl0aW9uYWwgcGFja2FnZXMgZm9jdXNlZCBvbiB0b2RheSdzIHdvcms6DQoNCmBgYHtyfQ0KbGlicmFyeShzZikgIyB3b3JraW5nIHdpdGggc2ltcGxlIGZlYXR1cmVzIC0gZ2Vvc3BhdGlhbA0KbGlicmFyeSh0bWFwKQ0KbGlicmFyeSh0aWR5Y2Vuc3VzKQ0KDQpgYGANCiMjIEluZm9ybWF0aW9uYWwgcmVzb3VyY2VzDQoNCiogQW4gb3ZlcmFsbCByZXNvdXJjZSBvbiBtYXBwaW5nIGluIFI6IGh0dHBzOi8vYm9va2Rvd24ub3JnL25pY29oYWhuL21ha2luZ19tYXBzX3dpdGhfcjUvZG9jcy9pbnRyb2R1Y3Rpb24uaHRtbA0KKiBBIHN0YXJ0aW5nIHBvaW50IHRvIGxlYXJuIGFib3V0IGBzZmA6ICBodHRwczovL3Itc3BhdGlhbC5naXRodWIuaW8vc2YvYXJ0aWNsZXMvDQoqIEdldHRpbmcgc3RhcnRlZCB3aXRoIGB0bWFwYDogaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL3RtYXAvdmlnbmV0dGVzL3RtYXAtZ2V0c3RhcnRlZC5odG1sDQoqIFRoZSBgdGlkeWNlbnN1c2AgcGFja2FnZTogaHR0cHM6Ly93YWxrZXItZGF0YS5jb20vdGlkeWNlbnN1cy9pbmRleC5odG1sDQoqIFRoZSBib29rIG9uIGB0aWR5Y2Vuc3VzYCA6IGh0dHBzOi8vd2Fsa2VyLWRhdGEuY29tL2NlbnN1cy1yL2luZGV4Lmh0bWwNCg0KDQojIyBVc2luZyB0aGUgTmVpZ2hib3Job29kIEdlb3NwYXRpYWwgRGF0YSAodXNpbmcgL2RhdGEpDQoNCg0KT3VyIGZpcnN0IGRhdGEgc291cmNlIGNvbWVzIGZyb20gb3BlbmRhdGEuZGMNCg0KaHR0cHM6Ly9vcGVuZGF0YS5kYy5nb3YvZGF0YXNldHMvRENHSVM6OmRjLWhlYWx0aC1wbGFubmluZy1uZWlnaGJvcmhvb2RzL2Fib3V0DQoNCg0KSSB3aWxsIHVzZSB0aGUgR2VvSlNPTiBmaWxlLiAgKE5ld2VyLCBub3QgbmVjZXNzYXJpbHkgYmV0dGVyLCBidXQgLi4uIGEgc2luZ2xlIGZpbGUuICBOb3Qgc21hbGxlciwgYnV0IC4uLiB0aGlzIG9uZSBpcyBub3QgYmlnLikgIA0KDQoNCg0KDQpEYXRhIGlzIGVhc2lseSByZWFkYWJsZSANCmBgYHtyfQ0KbmVpZ2g9c3RfcmVhZChoZXJlKCJEQ19IZWFsdGhfUGxhbm5pbmdfTmVpZ2hib3Job29kc19qb2V5Lmdlb2pzb24iKSkgJT4lIGNsZWFuX25hbWVzKCkNCmNsYXNzKG5laWdoKQ0KYGBgDQoNCmBgYHtyfQ0KcGxvdChuZWlnaCkNCmBgYA0KDQoNCg0KIyMgUmVtaW5kZXIgLSBKb2lucw0KDQpgYGB7cn0NCmRmMT10aWJibGUoZnJ1aXQ9YygiYXBwbGUiLCJiYW5hbmEiLCJjaGVycnkiKSxjb3N0PWMoMS41LDEuMiwyLjI1KSkNCmRmMj10aWJibGUoZnJ1aXQ9YygiYXBwbGUiLCJhcHBsZSIsImNoZXJyeSIsImxlbW9uIiksDQogICAgICAgICAgIGRlc2VydD1jKCJwaWUiLCJjb2JibGVyIiwiY29iYmxlciIsImNoZWVzZWNha2UiKSwNCiAgICAgICAgICAgY2FsPWMoNDAwLDQzMCw1MDAsNTUwKSkNCmRmMQ0KYGBgDQpgYGB7cn0NCmRmMg0KYGBgDQpgYGB7cn0NCmxlZnRfam9pbihkZjEsZGYyLGJ5PSJmcnVpdCIpDQpgYGANCg0KIyMgSW52ZXN0aWdhdGluZyBqb2luaW5nIHNwYXRpYWwgYW5kIG5vbi1zcGF0aWFsIGRhdGENCg0KDQpDb3ZpZCBjYXNlIGluZm9ybWF0aW9uIGlzIGF2YWlsYWJsZSBmcm9tIG9wZW5kYXRhZGM6DQoNCmh0dHBzOi8vb3BlbmRhdGEuZGMuZ292L2RhdGFzZXRzL0RDR0lTOjpkYy1jb3ZpZC0xOS10b3RhbC1wb3NpdGl2ZS1jYXNlcy1ieS1uZWlnaGJvcmhvb2QvYWJvdXQNCg0KUmVhZCBjYXNlcyBpbmZvcm1hdGlvbjoNCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmRmX2M9cmVhZF9jc3YoaGVyZSgiRENfQ09WSUQtMTlfVG90YWxfUG9zaXRpdmVfQ2FzZXNfYnlfTmVpZ2hib3Job29kX2pvZXkuY3N2IikpICU+JSBjbGVhbl9uYW1lcygpIA0KDQpkZl9jYXNlcz1kZl9jICU+JQ0KICBmaWx0ZXIoYXNfZGF0ZShkYXRlX3JlcG9ydGVkKSA9PSAiMjAyMi0wMi0yMiIpICU+JSANCiAgc2VwYXJhdGUobmVpZ2hib3Job29kLGludG89YygiY29kZSIsIm5hbWUiKSxzZXAgPSAiOiIpICU+JQ0KICBtdXRhdGUoY29kZT1jYXNlX3doZW4oDQogICAgY29kZT09Ik4zNSIgfiJOMCIsDQogICAgVFJVRSB+IGNvZGUNCiAgKSkgJT4lDQogIHNlbGVjdCgtb2JqZWN0aWQsLWRhdGVfcmVwb3J0ZWQpDQoNCg0KYGBgDQoNCg0KIyMgUmVndWxhciBqb2luaW5nIChvZiBkYXRhZnJhbWVzKQ0KDQpgYGB7cn0NCm5laWdoMj1sZWZ0X2pvaW4obmVpZ2gsZGZfY2FzZXMsYnk9YygiY29kZSIpKSANCg0KdG1hcF9tb2RlKCJ2aWV3IikNCg0KdG1fc2hhcGUobmVpZ2gyKSArdG1fcG9seWdvbnMoInRvdGFsX3Bvc2l0aXZlcyIsYWxwaGE9LjUpDQpgYGANCg0KDQojIyBKb2luaW5nIHdpdGggb3RoZXIgc3BhdGlhbCBkYXRhDQoNCkxldCdzIGdldCBzb21lIGRhdGEgdXNpbmcgYHRpZHljZW5zdXNgLiAgTmVlZCBhbiBBUEkga2V5ICAgaHR0cHM6Ly9hcGkuY2Vuc3VzLmdvdi9kYXRhL2tleV9zaWdudXAuaHRtbA0KDQoNCmBgYHtyfQ0KDQoNCmNlbnN1c19hcGlfa2V5KCJkNDQzOTVlMmZhMTAxZjgyMjYwZmFlNmI4NDU2NzZkNzFmMDE3YjcwIikNCg0KI3doYXQgdmFyaWFibGVzDQp2MjAgPSBsb2FkX3ZhcmlhYmxlcygyMDE4LCJhY3M1IikNCiMgbWVkaWFuX2ZhbWlseV9pbmNvbWU9IglCMDYwMTFfMDAxIiANCiMgYWxsICJCMDAwMDFfMDAxIgkNCiNibGFjayAiQjAyMDA5XzAwMSINCmBgYA0KDQoNCkdldCBzb21lIGRhdGE6DQoNCmBgYHtyfQ0KZGZfY2VuY3VzPWdldF9hY3MoZ2VvZ3JhcGh5ID0gInRyYWN0IiwNCiAgICAgICAgICAgICAgICAgIHZhcmlhYmxlcz1jKCJtZWRpYW5faW5jIj0iQjA2MDExXzAwMSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAicG9wIj0iQjAxMDAxXzAwMSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAicG9wX2JsYWNrIj0iQjAyMDA5XzAwMSIpLA0KICAgICAgICAgICAgICAgICAgc3RhdGU9IkRDIixnZW9tZXRyeT1UUlVFLHllYXI9MjAxOCkgDQpgYGANCg0KYGBge3J9DQpjbGFzcyhkZl9jZW5jdXMpDQpwbG90KGRmX2NlbmN1cykNCmBgYA0KSXQncyBpbiBsb25nIGZvcm1hdC4gIExldCdzIG1ha2UgaXQgd2lkZS4NCmBgYHtyfQ0KZGZfY2Vucz1kZl9jZW5jdXMgJT4lIHNlbGVjdCgtbW9lKSAlPiUgc3ByZWFkKHZhcmlhYmxlLGVzdGltYXRlKSANCg0KdG1fc2hhcGUoZGZfY2VucykgK3RtX3BvbHlnb25zKCJtZWRpYW5faW5jIixhbHBoYT0uNSkNCmBgYA0KDQoNCmBgYHtyfQ0KDQogIHRtX3NoYXBlKG5laWdoMikgK3RtX2JvcmRlcnMoY29sPSJibHVlIixsd2Q9NSxhbHBoYT0uMikrDQogIHRtX3NoYXBlKGRmX2NlbnMpICt0bV9ib3JkZXJzKGNvbD0icmVkIixsd2Q9MSxhbHBoYT0uMykNCmBgYA0KDQoNCg0KYGBge3J9DQojPDw8PDw8PCBIRUFEDQojZGZfaj1zdF9qb2luKGRmX2NlbnMsbmVpZ2gyKQ0KIz09PT09PT0NCiNkZl9qPXN0X2pvaW4oZGZfY2VucyxuZWlnaDIscHJlcGFyZWQ9RkFMU0UpDQojPj4+Pj4+PiBhYWYwMWJlNWNmNzIxODE5ZGQyZGY2MTVhZWY3YTE5OTliY2VjMGMyDQpgYGANCg0KYGBge3J9DQpkZl9jZW5zX2Fkaj1kZl9jZW5zICU+JSBzdF90cmFuc2Zvcm0oNDMyNikNCmBgYA0KDQpgYGB7cn0NCmRmX2o9c3Rfam9pbihkZl9jZW5zX2FkaixuZWlnaDIsbGFyZ2VzdD1UUlVFKQ0KYGBgDQpPdGhlciBvcmRlcj86DQoNCmBgYHtyfQ0KIzw8PDw8PDwgSEVBRA0KIyNkZl9qX3JldiA9IHN0X2pvaW4obmVpZ2gyLGRmX2NlbnNfYWRqLGxhcmdlc3Q9VFJVRSkNCiM9PT09PT09DQojZGZfal9yZXYgPSBzdF9qb2luKG5laWdoMixkZl9jZW5zX2FkaixsYXJnZXN0PVRSVUUpDQojPj4+Pj4+PiBhYWYwMWJlNWNmNzIxODE5ZGQyZGY2MTVhZWY3YTE5OTliY2VjMGMyDQpgYGANCg0KU2luY2Ugd2Ugd2FudCB0aGUgZ2VvbWV0cnkgZm9yIHRoZSBORUlHSEJPUkhPT0RTLCB3ZSBuZWVkIGEgdG8gd29yayBhIGxpdHRsZSBoYXJkZXI6DQoNCmBgYHtyfQ0KZGYxPWRmX2ogJT4lIHNlbGVjdChtZWRpYW5faW5jLHBvcCxwb3BfYmxhY2ssb2JqZWN0aWQpICU+JQ0KICBncm91cF9ieShvYmplY3RpZCkgJT4lDQogIHN1bW1hcmlzZShwb3Bfbj1zdW0ocG9wKSwNCiAgICAgICAgICAgIHBvcF9ibGFja19uPXN1bShwb3BfYmxhY2spLCANCiAgICAgICAgICAgIGFkal9tZWRpYW5faW5jb21lPXN1bShwb3AqbWVkaWFuX2luYykvcG9wX24pIA0KDQpwbG90KGRmMSkNCmBgYA0KDQpgYGB7cn0NCiNkZjI9bGVmdF9qb2luKG5laWdoMixkZjEpDQoNCmRmMj1sZWZ0X2pvaW4obmVpZ2gyLGRmMSAlPiUgc3Rfc2V0X2dlb21ldHJ5KE5VTEwpKQ0KDQpgYGANCg0KYGBge3J9DQpkZjI9ZGYyICU+JSBtdXRhdGUoYmxhY2tfcGVyYz1wb3BfYmxhY2tfbi9wb3BfbiwgY292aWRfcmF0ZT10b3RhbF9wb3NpdGl2ZXMvcG9wX24pDQp0bV9zaGFwZShkZjIpK3RtX3BvbHlnb25zKGMoImFkal9tZWRpYW5faW5jb21lIiwiY292aWRfcmF0ZSIsImJsYWNrX3BlcmMiKSkNCmBgYA0KDQoNCg0KYGBge3J9DQpkZjIgJT4lIGZpbHRlcihvYmplY3RpZCE9MzApICU+JSB0bV9zaGFwZSgpK3RtX3BvbHlnb25zKGMoImFkal9tZWRpYW5faW5jb21lIiwiY292aWRfcmF0ZSIsImJsYWNrX3BlcmMiKSxhbHBoYT0uNCkNCmBgYA0KDQoNCg==